#!/usr/bin/env python2
# -*- coding:utf-8 -*-

from bs4 import BeautifulSoup as bs
import urllib2
import xlwt
import time
import os


BASEDIR = os.path.split(os.path.realpath(__file__))[0]




def getHtml(url):
    """获取html页面"""
    url = url
    data = ''
    try:
        data = urllib2.urlopen(url).read()
        # print data
        return data
    except Exception as e:
        print str(e)
        return ''


def getTables(html):
    """html页面的所有表格"""
    tables = ''
    try:
        soup = bs(html)
        tables = soup.find_all('table')
    except Exception as e:
        print str(e)
    return tables


def getTrs(table):
    """获取表格的所有行"""
    table = table
    trs = table.find_all('tr')
    return trs


def getTds(tr):
    """获取表格行里面的所有列"""
    tr = tr
    tds = tr.find_all('td')
    return tds


def main():
    #保存目录
    path = BASEDIR + os.sep + 'rs-status'
    #判断目录是否存在
    if not os.path.exists(path):
        os.makedirs(path)
    #保存的文件名
    filename = 'hbase_rc_status_' + time.strftime('%Y%m%d%H%M%S') + '.xls'
    all_lines = []
    #hbase的所有regionserver的IP:rs-status的字典
    urls = {'10.10.1.1': 'http://10.10.1.1:60030/rs-status',
            '10.10.1.2': 'http://10.10.1.2:60030/rs-status',
            '10.10.1.3': 'http://10.10.1.3:60030/rs-status',
            '10.10.1.4': 'http://10.10.1.4:60030/rs-status',
            '10.10.1.5': 'http://10.10.1.5:60030/rs-status',
            '10.10.1.6': 'http://10.10.1.6:60030/rs-status',
            '10.10.1.7': 'http://10.10.1.7:60030/rs-status',
            }
    #创建execl表格对象
    wbk = xlwt.Workbook(encoding='utf-8', style_compression=0)
    for host in urls:
        url = urls[host]
        #标题
        title = ['url', 'Region Name', 'start key', 'end key', 'numberOfStores', ' numberOfStorefiles', ' storefileUncompressedSizeMB', ' storefileSizeMB', ' compressionRatio', ' memstoreSizeMB', ' storefileIndexSizeMB',
                 ' readRequestsCount', ' writeRequestsCount', ' rootIndexSizeKB', ' totalStaticIndexSizeKB', ' totalStaticBloomSizeKB', ' totalCompactingKVs', ' currentCompactedKVs', ' compactionProgressPct', ' coprocessors']
        # for i in range(27):
        #    title.append(i)
        lines = []
        #获取页面数据
        html = getHtml(url)
        #获取所有表格
        tables = getTables(html)
        # print tables
        #主要需要的表格是倒数第一张表
        trs = getTrs(tables[-1])
        for tr in trs:
            tds = getTds(tr)
            tdline = [url]
            for td in tds:
                #针对第四列特殊处理
                if tds.index(td) == 3:
                    temp_d = []
                    for item in td.get_text().split(','):
                        value = item.split('=')[-1]
                        try:
                            value = float(value)
                        except Exception:
                            value = value
                        temp_d.append(value)
                    tdline = tdline + temp_d
                else:
                    tdline.append(td.get_text())
            # print tdline
            #排除异常的行
            if len(tdline) < 8:
                continue
            lines.append(tdline)
        #all_lines = [title] + endlines
        # print lines
        #根据第8列进行排序，也就是storefileSizeMB
        lines.sort(key=lambda x: x[7], reverse=True)
        lines = [title] + lines
        sheet = wbk.add_sheet(host, cell_overwrite_ok=True)
        row = 0
        style = xlwt.XFStyle()
        font = xlwt.Font()
        font.name = 'Times New Roman'
        style.font = font
        #设置字体
        default = xlwt.easyxf(
            'font: height 180, name Arial, colour_index black, bold off, italic off; align: wrap on, vert centre, horiz left;')
        for line in lines:
            col = 0
            for cols in line:
                # print cols
                sheet.write(row, col, cols, default)
                #sheet.write(row, col, cols)
                col = col + 1
            row = row + 1
    # wbk.save('excelFile.xls')
    #保存
    wbk.save(path + os.sep + filename)
    print(path + os.sep + filename)
    return path + os.sep + filename

if __name__ == "__main__":
    main()